library(rtracklayer)
library(data.table)
library(tidyverse)

# convert UCSC rmsk track to bed ----------
hg38.rmsk <- fread('~/learn/erv_quant/scte_legacy/mm39.rmsk.txt')

hg38.rmsk.chr <- hg38.rmsk |>
  mutate(chr = V6, begin = V7, end = V8, id = V11, sw.score = V2,
         direction = V10, type = V12, .keep = 'none') |>
  filter(nchar(chr) < 6)

hg38.rmsk.chr |>
  fwrite('~/learn/erv_quant/scte_legacy/hg38.rmsk.chr.bed', col.names = F,
         sep = '\t')

hg38.rmsk.chr |>
  filter(sw.score > 2000, type == 'LTR') |>
  fwrite('~/learn/erv_quant/scte_legacy/hg38.rmsk.ltr.bed', col.names = F,
         sep = '\t')

pub.erv <- read_csv('mission/fdx1/publication.erv.csv')

b6.erv <- b6.out |> filter(V10 %in% pub.erv$V11) |>
  summarise(n(), .by = V11)

pub.erv |> filter(!(V11 %in% b6.erv$V10))

b6.gff.grange <-
import.gff('~/learn/erv_quant/Mus_musculus_c57bl6nj.C57BL_6NJ_v1.dna_sm.toplevel.fa.out.gff')

b6.gff.grange |> head()

b6.gff.grange |>
  export('~/learn/erv_quant/mm.b6.toplevel.fa.out.gff2')



